"""
    爬虫和反爬虫
        解决方案
            1、User-Agent：标识浏览器，参考浏览器大战
            2、代理IP
        解析数据：
            1、正则
            2、Xpath
            3、bs4
"""

import re
import requests
import pymysql
from pandas import DataFrame
# 模拟浏览器
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
# 发起请求
response = requests.get("https://www.bqgda.cc/", headers=headers)
content = response.text
# re.S 支持换行
# 解析数据
new_content = "".join(re.findall('<div.*?class="l bd">(.*?)</div>', content, re.S))
titles = re.findall("<li>.*?<a.*?>(.*?)</a>", new_content, re.S)
# 保存数据
# 保存到文本
with open("title.txt", "w", encoding="utf8") as f:
    for title in titles:
        f.write(title + "\n")
# 保存excel
df = DataFrame({"titles": titles})
df.to_excel("titles.xlsx", index=False)
# 保存mysql
connect = pymysql.connect(
    host='localhost',
    user='root',
    password='123456',
    database='db_16',
)
# 获取游标
cursor = connect.cursor()
sql = "insert into title (title) values(%s)"
cursor.executemany(sql, titles)
connect.commit()

# https://www.yingdao.com/
